<?php

namespace DataCube\DataCubeAggregation\Functions\Transform;

class Split
{
    // Hold-out
    public function stratifiedSplit($data, $splitRatio, int $testRatio = 0)
    {
        $totalItems = count($data);
        $splitItems = (int) ($totalItems * $splitRatio);
        $trainSet = array_slice($data, 0, $splitItems);
        if ($testRatio != 0) {
            $testItems = (int) ($totalItems * $testRatio);
            $testSet = array_slice($data, $splitItems, $testItems);
        } else {
            $testSet = array_slice($data, $splitItems);
        }

        return [$trainSet, $testSet];
    }

    // Cross Validation
    public function kFoldSplit($data, $k)
    {
        $folds = [];
        $datasetSize = count($data);
        $foldSize = (int) ($datasetSize / $k);

        for ($i = 0; $i < $k; $i++) {
            $start = $i * $foldSize;
            $end = ($i + 1) * $foldSize;
            $fold = [];

            for ($j = $start; $j < $end; $j++) {
                $fold[] = $data[$j];
            }

            $folds[] = $fold;
        }

        return $folds;
    }

    public function bootstrapping($data, $iterations)
    {
        $sampleSize = count($data);
        $bootstrapSamples = [];

        for ($i = 0; $i < $iterations; $i++) {
            // 从原始数据中随机选择一个样本
            $sampleIndex = mt_rand(0, $sampleSize - 1);
            $bootstrapSample = $data[$sampleIndex];

            // 将样本添加到bootstrap样本数组中
            if (!in_array($bootstrapSample, $bootstrapSamples)) {
                $bootstrapSamples[] = $bootstrapSample;
            }
        }

        // 返回bootstrap样本数组
        return $bootstrapSamples;
    }
}

